Step 0: Prepare covariates and input files

IDs <- fread("~/genotype_qc/TERRE_QC/all_imputed_r2_30_rsid_hard_call.fam")[, .(FID = V1, IID = V2)]
prsice_cov <- fread("prsice_cov_and_status_mvalues.txt")
prsice_cov <- prsice_cov[match(IDs$IID,prsice_cov$IID)]
all(IDs$IID ==prsice_cov$IID)
[1] NA
covariate <- cbind(IDs, prsice_cov[,-c(16,17,18)])

PD <- cbind(IDs,prsice_cov[,16])

head(covariate)
head(PD)
fwrite(na.omit(PD), "TERRE.pheno", sep = "\t")
fwrite(na.omit(covariate), "TERRE.covariate", sep = "\t")
covariate

Step 1: Run PRSice-2 on Nalls et al 2019 Sumstats

Rscript /home1/NEURO/casazza/PRSice.R \
    --prsice /home1/NEURO/casazza/PRSice_linux\
    --base /home1/NEURO/casazza/nalls_PD.QC.gz\
    --base-info INFO:0.8 \
    --base-maf MAF:0.01 \
    --cov TERRE.covariate \
    --binary-target T\
    --beta  \
    --ld /home1/NEURO/casazza/1000G_plink/EUR_phase3  \
    --out TERRE_PRSice \
    -q 5\
    --all-score\
    --pheno TERRE.pheno \
    --snp SNP \
    --stat b \
    --pvalue p\
    --target /home1/NEURO/casazza/genotype_qc/TERRE_QC/all_imputed_r2_30_rsid_hard_call \
    --thread 32

Step 2: Evaluate output

include_graphics("prsice_images/TERRE_PRSice_BARPLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_HIGH-RES_PLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_QUANTILES_PLOT_2022-06-30.png")


include_graphics("prsice_images/TERRE_PRSice_nalls_male_BARPLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_HIGH-RES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_QUANTILES_PLOT_2022-10-04.png")


include_graphics("prsice_images/TERRE_PRSice_nalls_female_BARPLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_HIGH-RES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_QUANTILES_PLOT_2022-10-04.png")

Plotting PRSice Data on my own

library(ggnewscale)
prsice_male_meta <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.prsice")
ggplot(prsice_male_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
  geom_point() +
  scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
  theme_minimal()


prsice_female_meta <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.prsice")
ggplot(prsice_female_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
  geom_point() +
  scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
  theme_minimal()


prsice_meta <- fread("prsice_data/TERRE_PRSice.prsice")
ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
  geom_point(data = prsice_female_meta, size = 1) +
  scale_color_gradient(low = "lightpink4", high = "lightpink") +
  labs(color = bquote("Female log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "lightblue4", high = "lightblue") +
  labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Male -log"["10"] ~ "(P)")) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
  geom_point(data = prsice_female_meta, size = 1) +
  scale_color_gradient(low = "lightpink4", high = "lightpink",guide = guide_colorbar(order=3)) +
  labs(color = bquote("Female -log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "lightblue4", high = "lightblue",guide = guide_colorbar(order=2)) +
  labs(color = bquote("Male -log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "gray40", high = "gray80",guide = guide_colorbar(order=1)) +
  labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Cross-sex -log"["10"] ~ "(P)")) +
  theme_minimal() +
  theme(legend.position = "top",legend.title = element_text(size=7))

Step 3 run linear model at different thresholds for SNP inclusion

This is updated to recently processed DNAm Data 20 outliers in DNAm removed:

load("/home1/NEURO/SHARE_DECIPHER/processed_DNAm_data/2022/TERRE_processed_2022/1-TERRE_RG_filtered.RData") #PD_RG_filtered
# Assign genotyping ID to data
original_covars <- fread("/home1/NEURO/SHARE_DECIPHER/terre_meta_master.csv")[, .(patient, IID = gsub("_PAE.*", "", IID))]
betas_combat <- minfi::getBeta(PD_RG_filtered)
colnames(betas_combat) <- original_covars$IID[match(colnames(betas_combat), original_covars$patient)]
betas_combat <- betas_combat[, colnames(betas_combat) %in% covariate$IID]

Let’s check how the data looks for the first 5 subjects:

ggplot(betas_combat[, 1:5] %>% as.data.table(keep.rownames = T) %>% melt(id.vars = "rn", value.name = "betas", variable.name = "subject"), aes(betas, color = subject))+
  geom_density()

Match DNA, PRS, and metadata @TODO fix this after update of PRS etc.

prsice_best <- fread("prsice_data/TERRE_PRSice.best")[match(colnames(betas_combat), IID,nomatch=0),.(best=PRS)]
prsice_all <- cbind(
  fread("prsice_data/TERRE_PRSice.all_score")[match(colnames(betas_combat), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_best
)
covariate <- covariate[match(colnames(betas_combat), IID)]
all(covariate$IID == colnames(betas_combat))
[1] TRUE
all(covariate$IID == prsice_all$IID)
[1] TRUE
covariate_male <- covariate[sex == 1] %>% select(-sex)
betas_male <- betas_combat[, covariate_male$IID]
prsice_male_best <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.best")[match(colnames(betas_male), IID,nomatch=0),.(best=PRS)]
prsice_male_all <- cbind(
  fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.all_score")[match(colnames(betas_male), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_male_best
)
covariate_male <- covariate_male[match(colnames(betas_male), IID,nomatch=0)]
all(covariate_male$IID == colnames(betas_male))
[1] TRUE
all(covariate_male$IID == prsice_male_all$IID)
[1] TRUE
prsice_female_best <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.best")
covariate_female <- covariate[sex == 0] %>% select(-sex) %>% filter(IID %in% prsice_female_best$IID)
betas_female <- betas_combat[, covariate_female$IID]
prsice_female_best <- prsice_female_best[match(colnames(betas_female), IID,nomatch=0),.(best=PRS)]
prsice_female_all <- cbind(
  fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.all_score")[match(colnames(betas_female), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_female_best
)
covariate_female <- covariate_female[match(colnames(betas_female), IID,nomatch=0)]
all(covariate_female$IID == colnames(betas_female))
[1] TRUE
all(covariate_female$IID == prsice_female_all$IID)
[1] TRUE

Run limma

mvalues <- lumi::beta2m(betas_combat)
prs_mat <- prsice_all[, -c(1, 2)]
cov_mat <- covariate[, -c(1, 2)]

mvalues_male <- lumi::beta2m(betas_male)
prs_mat_male <- prsice_male_all[, -c(1, 2)]
cov_mat_male <- covariate_male[, -c(1, 2)]

mvalues_female <- lumi::beta2m(betas_female)
prs_mat_female <- prsice_female_all[, -c(1, 2)]
cov_mat_female <- covariate_female[, -c(1, 2)]
registerDoParallel(ncol(prs_mat) / 4)
hits <- foreach(prs_thresh = colnames(prs_mat)) %dopar% {
  design_prs <- model.matrix(~., data = cbind(prs_mat[, ..prs_thresh], cov_mat))
  prs_fit <- lmFit(mvalues, design_prs)
  prs_fit <- eBayes(prs_fit)
  topTable(prs_fit, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues))
}

Plotting EWAS vs Threshold Experiment by Sex

to_plot <- rbind(
  hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
  hits_by_thresh_bonf_male[, .(hits = .N, Sex = "Male"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
  hits_by_thresh_bonf_female[, .(hits = .N, Sex = "Female"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0"))
) %>% mutate(Sex = factor(Sex, levels = c("Cross-sex", "Male", "Female")))
plot_pos <- position_dodge(width = 1)
ggplot(to_plot, aes(threshold, hits, fill = Sex, label = hits)) +
  geom_text(position = plot_pos, vjust = -0.25) +
  geom_col(position = plot_pos) +
  labs(x = "GWAS P Value Threshold", y = "EWAS Hits") +
  scale_fill_manual(values = c("grey80", "lightblue", "lightpink")) +
  theme_minimal()

hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold]
hits_by_thresh_bonf_male[, .(hits = .N), by = threshold]
hits_by_thresh_bonf_female[, .(hits = .N), by = threshold]
display_venn <- function(x, ...) {
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

display_venn(list(`Cross-sex` = hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID, Male = hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID, Female = hits_by_thresh_bonf_female[threshold == "Pt_5e-08"]$ID), fill = c("gray80", "lightblue", "lightpink"))
Loading required package: grid
Loading required package: futile.logger

male_ids <- hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID
cross_ids <- hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID
male_ids[!male_ids %in% cross_ids]
character(0)
get_full_fit <- function(prs_mat,cov_mat,mvalues){
  top_design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
  top_prs_fit <- lmFit(mvalues, top_design_prs)
  top_prs_fit <- eBayes(top_prs_fit)
  top_prs_hits <- topTable(top_prs_fit, coef = 2, adjust.method = "bonf", number = Inf, genelist = rownames(mvalues))
}
top_prs_hits <- get_full_fit(prs_mat,cov_mat,mvalues)
top_male_prs_hits <- get_full_fit(prs_mat_male, cov_mat_male, mvalues_male)
top_female_prs_hits <- get_full_fit(prs_mat_female, cov_mat_female, mvalues_female)
save(list=c("top_prs_hits","top_male_prs_hits","top_female_prs_hits"),file="prs_nalls_cross_w_sex_stratified.RData")
load("prs_nalls_cross_w_sex_stratified.RData")
manifest <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Other %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")
prs_annot <- data.table(top_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_male <- data.table(top_male_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_female<- data.table(top_female_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
plot_prs_hits <- function(prs_annot,label_color){
  ggplot(prs_annot, aes(logFC, -log10(P.Value))) +
    geom_point() +
    geom_point(
      data = subset(prs_annot, adj.P.Val < 0.05 & abs(logFC) > 0.03),
      color = label_color,
      mapping = aes(logFC, -log10(P.Value))
    ) +
    geom_hline(
      linetype = "dashed",
      yintercept = min(-log10(prs_annot$P.Value[prs_annot$adj.P.Val < 0.05]))
    ) +
    geom_vline(linetype = "dashed", xintercept = 0.03) +
    geom_vline(linetype = "dashed", xintercept = -0.03) +
    geom_text_repel(
      data = prs_annot %>% filter(abs(logFC) > 0.03 & adj.P.Val < 0.05),
      color = "dodgerblue",
      mapping = aes(logFC, -log10(P.Value), label = ifelse(gene != "", gene, ID)),
      size = 3,
      max.overlaps = 20
    ) +
    labs(y = bquote("log"[10] ~ "(P)"), x = quote(Delta ~ "M" ~ Methylation)) +
    theme_minimal()
}

plot_prs_hits(prs_annot,"gray40")

plot_prs_hits(prs_annot_male,"lightblue")

plot_prs_hits(prs_annot_female,"pink")

go enrichment?

library(gprofiler2)
cross_sex <- unique(gsub(";.*","",manifest[manifest$name %in% hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID,]$UCSC_RefGene_Name))
background <- unique(gsub(";.*","",manifest$UCSC_RefGene_Name))
gost_res <- gost(query=cross_sex,custom_bg = background)
gostplot(gost_res)
gmirror(
  prs_annot_male[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
  prs_annot_female[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
  annotate_snp = c(male_annot_cpg,female_annot_cpg),
  tline =  max(prs_annot_male[adj.P.Val<0.25]$P.Value),
  bline =  max(prs_annot_female[adj.P.Val<0.25]$P.Value),
  highlight_p = c(max(prs_annot_male[adj.P.Val<0.25]$P.Value),max(prs_annot_female[adj.P.Val<0.25]$P.Value)),
  toptitle="Male",
  bottomtitle ="Female",
  highlighter="green",
  background="white"
)
[1] "Saving plot to gmirror.png"
TableGrob (2 x 1) "arrange": 2 grobs

Manhattan plot vs GWAS manhattan plot

library(qqman)

copy_annot <- prs_annot[cpg_pos, on = c(ID = "name")] %>%
  mutate(chr = as.numeric(recode(gsub("chr", "", chr),X="23",Y="24")))
to_plot <- copy_annot[, .(SNP = gene, CHR = chr, BP = pos, P = P.Value, FDR = adj.P.Val)][!is.na(P)]
manhattan(to_plot,
  annotatePval = max(to_plot[FDR < 0.05]$P),
  annotateTop = TRUE,
  genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
  suggestiveline = FALSE
)
manhattan(to_plot[CHR == 17],
  annotatePval = max(to_plot[FDR < 0.05]$P),
  annotateTop = FALSE,
  genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
  suggestiveline = FALSE
)
qq(to_plot$P)

DMRs

library(DMRcate)
Loading required package: minfi
Loading required package: BiocGenerics

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply,
    parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB

The following object is masked from ‘package:limma’:

    plotMA

The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq,
    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax,
    pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, union,
    unique, unsplit, which, which.max, which.min

Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: S4Vectors

Attaching package: ‘S4Vectors’

The following objects are masked from ‘package:data.table’:

    first, second

The following objects are masked from ‘package:dplyr’:

    first, rename

The following object is masked from ‘package:tidyr’:

    expand

The following object is masked from ‘package:base’:

    expand.grid

Loading required package: IRanges

Attaching package: ‘IRanges’

The following object is masked from ‘package:data.table’:

    shift

The following objects are masked from ‘package:dplyr’:

    collapse, desc, slice

The following object is masked from ‘package:purrr’:

    reduce

Loading required package: GenomeInfoDb
Loading required package: SummarizedExperiment
Loading required package: Biobase
Welcome to Bioconductor

    Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.

Loading required package: DelayedArray
Loading required package: matrixStats

Attaching package: ‘matrixStats’

The following objects are masked from ‘package:Biobase’:

    anyMissing, rowMedians

The following object is masked from ‘package:dplyr’:

    count

Loading required package: BiocParallel

Attaching package: ‘DelayedArray’

The following objects are masked from ‘package:matrixStats’:

    colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges

The following object is masked from ‘package:purrr’:

    simplify

The following objects are masked from ‘package:base’:

    aperm, apply, rowsum

Loading required package: Biostrings
Loading required package: XVector

Attaching package: ‘XVector’

The following object is masked from ‘package:purrr’:

    compact


Attaching package: ‘Biostrings’

The following object is masked from ‘package:base’:

    strsplit

Loading required package: bumphunter
Loading required package: locfit
locfit 1.5-9.4   2020-03-24

Attaching package: ‘locfit’

The following object is masked from ‘package:purrr’:

    none

replacing previous import 'minfi::getMeth' by 'bsseq::getMeth' when loading 'DMRcate'
S4_to_dataframe <- function(s4obj) {
  nms <- slotNames(s4obj)
  lst <- lapply(nms, function(nm) slot(s4obj, nm))
  as.data.frame(setNames(lst, nms))
}
run_dmrcate <- function(prs_mat,cov_mat,mvalues){
  design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
  prs_annotated <- cpg.annotate(datatype = "array", object = mvalues, analysis.type = "differential", design = design_prs, coef = 2, what = "M", arraytype = "EPIC", fdr = 0.05)
  prs_dmr_res <- dmrcate(prs_annotated, lambda = 1000, C = 2)
  return(S4_to_dataframe(prs_dmr_res))
}
dmr_cross <- run_dmrcate(prs_mat, cov_mat, mvalues)
Loading required package: IlluminaHumanMethylationEPICanno.ilm10b4.hg19
Your contrast returned 85 individually significant probes; a small but real effect. Consider manually setting the value of pcutoff to return more DMRs, but be warned that doing this increases the risk of Type I errors.
Fitting chr1...
Fitting chr2...
Fitting chr3...
Fitting chr4...
Fitting chr5...
Fitting chr6...
Fitting chr7...
Fitting chr8...
Fitting chr9...
Fitting chr10...
Fitting chr11...
Fitting chr12...
Fitting chr13...
Fitting chr14...
Fitting chr15...
Fitting chr16...
Fitting chr17...
Fitting chr18...
Fitting chr19...
Fitting chr20...
Fitting chr21...
Fitting chr22...
Fitting chrX...
Fitting chrY...
Demarcating regions...
Done!
dmr_males <- run_dmrcate(prs_mat_male,cov_mat_male,mvalues_male)
Your contrast returned 59 individually significant probes; a small but real effect. Consider manually setting the value of pcutoff to return more DMRs, but be warned that doing this increases the risk of Type I errors.
Fitting chr1...
Fitting chr2...
Fitting chr3...
Fitting chr4...
Fitting chr5...
Fitting chr6...
Fitting chr7...
Fitting chr8...
Fitting chr9...
Fitting chr10...
Fitting chr11...
Fitting chr12...
Fitting chr13...
Fitting chr14...
Fitting chr15...
Fitting chr16...
Fitting chr17...
Fitting chr18...
Fitting chr19...
Fitting chr20...
Fitting chr21...
Fitting chr22...
Fitting chrX...
Fitting chrY...
Demarcating regions...
Done!
dmr_female <- run_dmrcate(prs_mat_female,cov_mat_female,mvalues_female)
Your contrast returned 41 individually significant probes; a small but real effect. Consider manually setting the value of pcutoff to return more DMRs, but be warned that doing this increases the risk of Type I errors.
Fitting chr1...
Fitting chr2...
Fitting chr3...
Fitting chr4...
Fitting chr5...
Fitting chr6...
Fitting chr7...
Fitting chr8...
Fitting chr9...
Fitting chr10...
Fitting chr11...
Fitting chr12...
Fitting chr13...
Fitting chr14...
Fitting chr15...
Fitting chr16...
Fitting chr17...
Fitting chr18...
Fitting chr19...
Fitting chr20...
Fitting chr21...
Fitting chr22...
Fitting chrX...
Fitting chrY...
Demarcating regions...
Done!
annotation <- minfi::getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)

Plotting all DMRs

get_dmr_effects <- function(dmr, limma_res,mvalues) {
  dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
  cpgs <- as.data.table(annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ])
  res <- limma_res[cpgs,on=c("ID"="Name"),nomatch=0 ]
  dmr_1 <- as.data.frame(res[!is.na(res$logFC), ])
  dmr_methy <- reshape2::melt(lumi::m2beta(mvalues[dmr_1$ID, ]), stringsAsFactors = FALSE)
  to_plot <- merge(dmr_1, dmr_methy, by.x = "ID", by.y = "Var1")
  to_plot$DMR <- dmr
  to_plot
}
get_dmr_res <- function(dmr, limma_res) {
  dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
  cpgs <- annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ]
  res <- cbind(cpgs, limma_res[cpgs$Name, ])
  res$DMR <- dmr
  return(res)
}
plot_dmrs <- function(dmr_res,limma_res,prs_mat, mvalues, case_control) {
  to_plot <- rbindlist(lapply(dmr_res$coord, function(dmr) get_dmr_effects(dmr, limma_res,mvalues))) %>%
    mutate(SCORE1_AVG = scale(prs_mat[match(Var2,IDs$IID),`Pt_5e-08`]),PD = PD[match(Var2,IID)]$PD) %>%
    filter(!is.na(SCORE1_AVG))
  for(cur_dmr in unique(to_plot$DMR)){
    cur_plot <- to_plot[DMR == cur_dmr]
    p1 <- ggplot(cur_plot , aes(pos, value, color = SCORE1_AVG)) +
      geom_point() +
      scale_color_gradient(low = rev(case_control)[1], high = rev(case_control)[2]) +
      theme_minimal() +
      labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "Normalized PD PRS")+
      theme(axis.text.x = element_text(angle = 90))
    p2 <- ggplot(cur_plot %>% mutate(PD = ifelse(PD == 1, "CASE", "CONTROL")), aes(factor(pos), value, color = PD)) +
      geom_boxplot(position = position_dodge(0.75)) +
      scale_color_manual(values = case_control) +
      theme_minimal() +
      stat_summary(aes(group = PD), fun = mean, geom = "line") +
      labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "PD status") +
      theme(axis.text.x = element_text(angle = 90))
    print(p1)
    print(p2)
  }
}

plot_dmrs(dmr_cross, prs_annot, prs_mat,mvalues, rev(c("gray80", "gray40")))

plot_dmrs(dmr_males,prs_annot_male,prs_mat_male, mvalues_male,rev(c("light blue", "lightblue4")))

plot_dmrs(dmr_female, prs_annot_female, prs_mat_female, mvalues_female,rev(c("lightpink", "lightpink4")))

dmr_cross
dmr_males
dmr_female
save(list=c("dmr_cross","dmr_males","dmr_female"),file="prs_dmr_nalls.RData")
---
title: "PRSice2 Development of risk score"
output:
  html_notebook:
    toc: yes
---

```{r setup, include=FALSE}
library(tidyverse)
library(ggrepel)
library(data.table)
library(knitr)
library(limma)
library(foreach)
library(doParallel)
Sys.setlocale("LC_MESSAGES", "en_US.utf8")
knitr::opts_chunk$set(echo = TRUE)
```

# Step 0: Prepare covariates and input files
```{r}
IDs <- fread("~/genotype_qc/TERRE_QC/all_imputed_r2_30_rsid_hard_call.fam")[, .(FID = V1, IID = V2)]
prsice_cov <- fread("prsice_cov_and_status_mvalues.txt")
prsice_cov <- prsice_cov[match(IDs$IID,prsice_cov$IID)]
all(IDs$IID ==prsice_cov$IID)
covariate <- cbind(IDs, prsice_cov[,-c(16,17,18)])

PD <- cbind(IDs,prsice_cov[,16])

head(covariate)
head(PD)
fwrite(na.omit(PD), "TERRE.pheno", sep = "\t")
fwrite(na.omit(covariate), "TERRE.covariate", sep = "\t")
```
```{r}
covariate
```


# Step 1: Run PRSice-2 on Nalls et al 2019 Sumstats
```{bash,eval=FALSE}
Rscript /home1/NEURO/casazza/PRSice.R \
    --prsice /home1/NEURO/casazza/PRSice_linux\
    --base /home1/NEURO/casazza/nalls_PD.QC.gz\
    --base-info INFO:0.8 \
    --base-maf MAF:0.01 \
    --cov TERRE.covariate \
    --binary-target T\
    --beta  \
    --ld /home1/NEURO/casazza/1000G_plink/EUR_phase3  \
    --out TERRE_PRSice \
    -q 5\
    --all-score\
    --pheno TERRE.pheno \
    --snp SNP \
    --stat b \
    --pvalue p\
    --target /home1/NEURO/casazza/genotype_qc/TERRE_QC/all_imputed_r2_30_rsid_hard_call \
    --thread 32
```
# Step 2: Evaluate output
```{r, out.width="400px"}
include_graphics("prsice_images/TERRE_PRSice_BARPLOT_2022-06-30.png")
include_graphics("prsice_images/TERRE_PRSice_HIGH-RES_PLOT_2022-06-30.png")
include_graphics("prsice_images/TERRE_PRSice_QUANTILES_PLOT_2022-06-30.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_male_BARPLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_male_HIGH-RES_PLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_male_QUANTILES_PLOT_2022-10-04.png")

include_graphics("prsice_images/TERRE_PRSice_nalls_female_BARPLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_female_HIGH-RES_PLOT_2022-10-04.png")
include_graphics("prsice_images/TERRE_PRSice_nalls_female_QUANTILES_PLOT_2022-10-04.png")
```
## Plotting PRSice Data on my own
```{r}
library(ggnewscale)
prsice_male_meta <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.prsice")
ggplot(prsice_male_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
  geom_point() +
  scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
  theme_minimal()

prsice_female_meta <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.prsice")
ggplot(prsice_female_meta[Threshold <= 0.5], aes(Threshold, Num_SNP, color = -log10(P))) +
  geom_point() +
  scale_y_continuous(breaks = c(seq(0, 1e5, 2.5e4), seq(2e5, 6e5, 1e5))) +
  theme_minimal()

prsice_meta <- fread("prsice_data/TERRE_PRSice.prsice")
ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
  geom_point(data = prsice_female_meta, size = 1) +
  scale_color_gradient(low = "lightpink4", high = "lightpink") +
  labs(color = bquote("Female log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "lightblue4", high = "lightblue") +
  labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Male -log"["10"] ~ "(P)")) +
  theme_minimal() +
  theme(legend.position = "top")
ggplot(mapping = aes(Threshold, R2, color = -log10(P))) +
  geom_point(data = prsice_female_meta, size = 1) +
  scale_color_gradient(low = "lightpink4", high = "lightpink",guide = guide_colorbar(order=3)) +
  labs(color = bquote("Female -log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_male_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "lightblue4", high = "lightblue",guide = guide_colorbar(order=2)) +
  labs(color = bquote("Male -log"["10"] ~ "(P)")) +
  new_scale_color() +
  geom_point(data = prsice_meta, size = 1, aes(color = -log10(P))) +
  scale_color_gradient(low = "gray40", high = "gray80",guide = guide_colorbar(order=1)) +
  labs(y = bquote("R"^2), x = "GWAS P-Value Threshold", color = bquote("Cross-sex -log"["10"] ~ "(P)")) +
  theme_minimal() +
  theme(legend.position = "top",legend.title = element_text(size=7))
```

# Step 3 run linear model at different thresholds for SNP inclusion
This is updated to recently processed DNAm Data 20 outliers in DNAm removed:
```{r}
load("/home1/NEURO/SHARE_DECIPHER/processed_DNAm_data/2022/TERRE_processed_2022/1-TERRE_RG_filtered.RData") #PD_RG_filtered
# Assign genotyping ID to data
original_covars <- fread("/home1/NEURO/SHARE_DECIPHER/terre_meta_master.csv")[, .(patient, IID = gsub("_PAE.*", "", IID))]
betas_combat <- minfi::getBeta(PD_RG_filtered)
colnames(betas_combat) <- original_covars$IID[match(colnames(betas_combat), original_covars$patient)]
betas_combat <- betas_combat[, colnames(betas_combat) %in% covariate$IID]
```
Let's check how the data looks for the first 5 subjects:
```{r}
ggplot(betas_combat[, 1:5] %>% as.data.table(keep.rownames = T) %>% melt(id.vars = "rn", value.name = "betas", variable.name = "subject"), aes(betas, color = subject))+
  geom_density()
```

### Match DNA, PRS, and metadata @TODO fix this after update of PRS etc.
```{r}
prsice_best <- fread("prsice_data/TERRE_PRSice.best")[match(colnames(betas_combat), IID,nomatch=0),.(best=PRS)]
prsice_all <- cbind(
  fread("prsice_data/TERRE_PRSice.all_score")[match(colnames(betas_combat), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_best
)
covariate <- covariate[match(colnames(betas_combat), IID)]
all(covariate$IID == colnames(betas_combat))
all(covariate$IID == prsice_all$IID)

covariate_male <- covariate[sex == 1] %>% select(-sex)
betas_male <- betas_combat[, covariate_male$IID]
prsice_male_best <- fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.best")[match(colnames(betas_male), IID,nomatch=0),.(best=PRS)]
prsice_male_all <- cbind(
  fread("prsice_nalls_male_data/TERRE_PRSice_nalls_male.all_score")[match(colnames(betas_male), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_male_best
)
covariate_male <- covariate_male[match(colnames(betas_male), IID,nomatch=0)]
all(covariate_male$IID == colnames(betas_male))
all(covariate_male$IID == prsice_male_all$IID)

prsice_female_best <- fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.best")
covariate_female <- covariate[sex == 0] %>% select(-sex) %>% filter(IID %in% prsice_female_best$IID)
betas_female <- betas_combat[, covariate_female$IID]
prsice_female_best <- prsice_female_best[match(colnames(betas_female), IID,nomatch=0),.(best=PRS)]
prsice_female_all <- cbind(
  fread("prsice_nalls_female_data/TERRE_PRSice_nalls_female.all_score")[match(colnames(betas_female), IID,nomatch=0), .(FID, IID, `Pt_5e-08`, `Pt_5.005e-05`, `Pt_0.00010005`, `Pt_0.00100005`, `Pt_0.0101501`, `Pt_0.1`, `Pt_0.2`, `Pt_0.3`, `Pt_0.4`, `Pt_0.5`, `Pt_1`)],
  prsice_female_best
)
covariate_female <- covariate_female[match(colnames(betas_female), IID,nomatch=0)]
all(covariate_female$IID == colnames(betas_female))
all(covariate_female$IID == prsice_female_all$IID)


```
### Run limma

```{r}
mvalues <- lumi::beta2m(betas_combat)
prs_mat <- prsice_all[, -c(1, 2)]
cov_mat <- covariate[, -c(1, 2)]

mvalues_male <- lumi::beta2m(betas_male)
prs_mat_male <- prsice_male_all[, -c(1, 2)]
cov_mat_male <- covariate_male[, -c(1, 2)]

mvalues_female <- lumi::beta2m(betas_female)
prs_mat_female <- prsice_female_all[, -c(1, 2)]
cov_mat_female <- covariate_female[, -c(1, 2)]
```

```{r}
registerDoParallel(ncol(prs_mat) / 4)
hits <- foreach(prs_thresh = colnames(prs_mat)) %dopar% {
  design_prs <- model.matrix(~., data = cbind(prs_mat[, ..prs_thresh], cov_mat))
  prs_fit <- lmFit(mvalues, design_prs)
  prs_fit <- eBayes(prs_fit)
  topTable(prs_fit, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues))
}
names(hits) <- colnames(prs_mat)
hits_by_thresh_bonf <- rbindlist(hits, idcol = "threshold", fill = TRUE)

registerDoParallel(ncol(prs_mat_male) / 4)
hits_male <- foreach(prs_thresh = colnames(prs_mat_male)) %dopar% {
  design_prs_male <- model.matrix(~., data = cbind(prs_mat_male[, ..prs_thresh], cov_mat_male))
  prs_fit_male <- lmFit(mvalues_male, design_prs_male)
  prs_fit_male <- eBayes(prs_fit_male)
  topTable(prs_fit_male, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues_male))
}
names(hits_male) <- colnames(prs_mat_male)
hits_by_thresh_bonf_male <- rbindlist(hits_male, idcol = "threshold", fill = TRUE)

registerDoParallel(ncol(prs_mat_female) / 4)
hits_female <- foreach(prs_thresh = colnames(prs_mat_female)) %dopar% {
  design_prs_female <- model.matrix(~., data = cbind(prs_mat_female[, ..prs_thresh], cov_mat_female))
  prs_fit_female <- lmFit(mvalues_female, design_prs_female)
  prs_fit_female <- eBayes(prs_fit_female)
  topTable(prs_fit_female, coef = 2, adjust.method = "bonf", p.value = 0.05, number = Inf, genelist = rownames(mvalues_female))
}
names(hits_female) <- colnames(prs_mat_female)
hits_by_thresh_bonf_female <- rbindlist(hits_female, idcol = "threshold", fill = TRUE)

```

### Plotting EWAS vs Threshold Experiment by Sex
```{r}
to_plot <- rbind(
  hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
  hits_by_thresh_bonf_male[, .(hits = .N, Sex = "Male"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0")),
  hits_by_thresh_bonf_female[, .(hits = .N, Sex = "Female"), by = threshold] %>%
    mutate(threshold = recode_factor(threshold, `Pt_0.0219001` = "0.0219", `Pt_5e-08` = "5e-8", `Pt_5.005e-05` = "5e-5", `Pt_0.00010005` = "1e-4", `Pt_0.00100005` = "1e-3", `Pt_0.0101501` = "1e-2", `Pt_0.1` = "0.1", `Pt_0.2` = "0.2", `Pt_0.3` = "0.3", `Pt_0.4` = "0.4", `Pt_0.5` = "0.5", `Pt_1` = "1.0"))
) %>% mutate(Sex = factor(Sex, levels = c("Cross-sex", "Male", "Female")))
plot_pos <- position_dodge(width = 1)
ggplot(to_plot, aes(threshold, hits, fill = Sex, label = hits)) +
  geom_text(position = plot_pos, vjust = -0.25) +
  geom_col(position = plot_pos) +
  labs(x = "GWAS P Value Threshold", y = "EWAS Hits") +
  scale_fill_manual(values = c("grey80", "lightblue", "lightpink")) +
  theme_minimal()
hits_by_thresh_bonf[, .(hits = .N, Sex = "Cross-sex"), by = threshold]
hits_by_thresh_bonf_male[, .(hits = .N), by = threshold]
hits_by_thresh_bonf_female[, .(hits = .N), by = threshold]
```
```{r}
display_venn <- function(x, ...) {
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

display_venn(list(`Cross-sex` = hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID, Male = hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID, Female = hits_by_thresh_bonf_female[threshold == "Pt_5e-08"]$ID), fill = c("gray80", "lightblue", "lightpink"))
male_ids <- hits_by_thresh_bonf_male[threshold == "Pt_5e-08"]$ID
cross_ids <- hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID
male_ids[!male_ids %in% cross_ids]
```

```{r}
get_full_fit <- function(prs_mat,cov_mat,mvalues){
  top_design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
  top_prs_fit <- lmFit(mvalues, top_design_prs)
  top_prs_fit <- eBayes(top_prs_fit)
  top_prs_hits <- topTable(top_prs_fit, coef = 2, adjust.method = "bonf", number = Inf, genelist = rownames(mvalues))
}
top_prs_hits <- get_full_fit(prs_mat,cov_mat,mvalues)
top_male_prs_hits <- get_full_fit(prs_mat_male, cov_mat_male, mvalues_male)
top_female_prs_hits <- get_full_fit(prs_mat_female, cov_mat_female, mvalues_female)
save(list=c("top_prs_hits","top_male_prs_hits","top_female_prs_hits"),file="prs_nalls_cross_w_sex_stratified.RData")
```

```{r}
load("prs_nalls_cross_w_sex_stratified.RData")
```
```{r}
manifest <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Other %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")
prs_annot <- data.table(top_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_male <- data.table(top_male_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
prs_annot_female<- data.table(top_female_prs_hits)[manifest, gene := gsub(";.*", "", UCSC_RefGene_Name), on = c(ID = "name")]
plot_prs_hits <- function(prs_annot,label_color){
  ggplot(prs_annot, aes(logFC, -log10(P.Value))) +
    geom_point() +
    geom_point(
      data = subset(prs_annot, adj.P.Val < 0.05 & abs(logFC) > 0.03),
      color = label_color,
      mapping = aes(logFC, -log10(P.Value))
    ) +
    geom_hline(
      linetype = "dashed",
      yintercept = min(-log10(prs_annot$P.Value[prs_annot$adj.P.Val < 0.05]))
    ) +
    geom_vline(linetype = "dashed", xintercept = 0.03) +
    geom_vline(linetype = "dashed", xintercept = -0.03) +
    geom_text_repel(
      data = prs_annot %>% filter(abs(logFC) > 0.03 & adj.P.Val < 0.05),
      color = "dodgerblue",
      mapping = aes(logFC, -log10(P.Value), label = ifelse(gene != "", gene, ID)),
      size = 3,
      max.overlaps = 20
    ) +
    labs(y = bquote("log"[10] ~ "(P)"), x = quote(Delta ~ "M" ~ Methylation)) +
    theme_minimal()
}

plot_prs_hits(prs_annot,"gray40")
plot_prs_hits(prs_annot_male,"lightblue")
plot_prs_hits(prs_annot_female,"pink")
```


```{r}
prs_plot_data <- data.table(dnam = lumi::m2beta(mvalues["cg12609785",]), prs = prs_mat$`Pt_5e-08`,PD=ifelse(PD[match(colnames(mvalues),IID),]$PD == 1,"Case","Control"),Sex=factor(ifelse(cov_mat$sex ==1,"Male","Female"),levels=c("Male","Female")))
ggplot(prs_plot_data,aes(prs,dnam,color=Sex,shape = PD,group=Sex)) +
  geom_point(size=3)+
  geom_smooth(method="lm",se=F)+
  labs(y=bquote("Methylation"~beta),x="Parkinson's GRS")+
  scale_color_manual(values=c("Male"="lightblue","Female"="lightpink")) + 
  theme_minimal(base_size=20)
```

## go enrichment?
```{r,eval=FALSE}
library(gprofiler2)
cross_sex <- unique(gsub(";.*","",manifest[manifest$name %in% hits_by_thresh_bonf[threshold == "Pt_5e-08"]$ID,]$UCSC_RefGene_Name))
background <- unique(gsub(";.*","",manifest$UCSC_RefGene_Name))
gost_res <- gost(query=cross_sex,custom_bg = background)
gostplot(gost_res)
```
```{r}
cpg_pos <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Locations %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")
male_annot_cpg <- (prs_annot_male %>% filter(adj.P.Val < 0.25) %>% left_join(cpg_pos,by=c("ID"="name")) %>% group_by(chr) %>% top_n(-10,P.Value))$ID
female_annot_cpg <- (prs_annot_female %>% filter(adj.P.Val < 0.25) %>% left_join(cpg_pos,by=c("ID"="name")) %>% group_by(chr) %>% top_n(-10,P.Value))$ID
library(hudson)
options(ggrepel.max.overlaps = Inf)
gmirror(
  prs_annot_male[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
  prs_annot_female[,.(SNP=ID,CHR=gsub("chr","",cpg_pos[match(ID,cpg_pos$name),]$chr),POS=cpg_pos[match(ID,cpg_pos$name),]$pos,pvalue=P.Value)],
  annotate_snp = c(male_annot_cpg,female_annot_cpg),
  tline =  max(prs_annot_male[adj.P.Val<0.25]$P.Value),
  bline =  max(prs_annot_female[adj.P.Val<0.25]$P.Value),
  highlight_p = c(max(prs_annot_male[adj.P.Val<0.25]$P.Value),max(prs_annot_female[adj.P.Val<0.25]$P.Value)),
  toptitle="Male",
  bottomtitle ="Female",
  highlighter="green",
  background="white"
)

```

# Manhattan plot vs GWAS manhattan plot
```{r,eval=FALSE}
library(qqman)

copy_annot <- prs_annot[cpg_pos, on = c(ID = "name")] %>%
  mutate(chr = as.numeric(recode(gsub("chr", "", chr),X="23",Y="24")))
to_plot <- copy_annot[, .(SNP = gene, CHR = chr, BP = pos, P = P.Value, FDR = adj.P.Val)][!is.na(P)]
manhattan(to_plot,
  annotatePval = max(to_plot[FDR < 0.05]$P),
  annotateTop = TRUE,
  genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
  suggestiveline = FALSE
)
manhattan(to_plot[CHR == 17],
  annotatePval = max(to_plot[FDR < 0.05]$P),
  annotateTop = FALSE,
  genomewideline = min(-log10(to_plot[FDR < 0.05]$P)),
  suggestiveline = FALSE
)
qq(to_plot$P)
```


## DMRs

```{r}
library(DMRcate)
S4_to_dataframe <- function(s4obj) {
  nms <- slotNames(s4obj)
  lst <- lapply(nms, function(nm) slot(s4obj, nm))
  as.data.frame(setNames(lst, nms))
}
run_dmrcate <- function(prs_mat,cov_mat,mvalues){
  design_prs <- model.matrix(~., data = cbind(prs_mat[, `Pt_5e-08`], cov_mat))
  prs_annotated <- cpg.annotate(datatype = "array", object = mvalues, analysis.type = "differential", design = design_prs, coef = 2, what = "M", arraytype = "EPIC", fdr = 0.05)
  prs_dmr_res <- dmrcate(prs_annotated, lambda = 1000, C = 2)
  return(S4_to_dataframe(prs_dmr_res))
}
dmr_cross <- run_dmrcate(prs_mat, cov_mat, mvalues)
dmr_males <- run_dmrcate(prs_mat_male,cov_mat_male,mvalues_male)
dmr_female <- run_dmrcate(prs_mat_female,cov_mat_female,mvalues_female)
```
```{r}
annotation <- minfi::getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
```
### Plotting all DMRs {.tabset .tabset-fade}
```{r}
get_dmr_effects <- function(dmr, limma_res,mvalues) {
  dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
  cpgs <- as.data.table(annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ])
  res <- limma_res[cpgs,on=c("ID"="Name"),nomatch=0 ]
  dmr_1 <- as.data.frame(res[!is.na(res$logFC), ])
  dmr_methy <- reshape2::melt(lumi::m2beta(mvalues[dmr_1$ID, ]), stringsAsFactors = FALSE)
  to_plot <- merge(dmr_1, dmr_methy, by.x = "ID", by.y = "Var1")
  to_plot$DMR <- dmr
  to_plot
}
get_dmr_res <- function(dmr, limma_res) {
  dmr_coord <- str_match_all(as.character(dmr), "(chr.*):([0-9]*)-([0-9]*)")[[1]]
  cpgs <- annotation[annotation$chr == dmr_coord[2] & annotation$pos >= as.numeric(dmr_coord[3]) & annotation$pos <= as.numeric(dmr_coord[4]), ]
  res <- cbind(cpgs, limma_res[cpgs$Name, ])
  res$DMR <- dmr
  return(res)
}
plot_dmrs <- function(dmr_res,limma_res,prs_mat, mvalues, case_control) {
  to_plot <- rbindlist(lapply(dmr_res$coord, function(dmr) get_dmr_effects(dmr, limma_res,mvalues))) %>%
    mutate(SCORE1_AVG = scale(prs_mat[match(Var2,IDs$IID),`Pt_5e-08`]),PD = PD[match(Var2,IID)]$PD) %>%
    filter(!is.na(SCORE1_AVG))
  for(cur_dmr in unique(to_plot$DMR)){
    cur_plot <- to_plot[DMR == cur_dmr]
    p1 <- ggplot(cur_plot , aes(pos, value, color = SCORE1_AVG)) +
      geom_point() +
      scale_color_gradient(low = rev(case_control)[1], high = rev(case_control)[2]) +
      theme_minimal() +
      labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "Normalized PD PRS")+
      theme(axis.text.x = element_text(angle = 90))
    p2 <- ggplot(cur_plot %>% mutate(PD = ifelse(PD == 1, "CASE", "CONTROL")), aes(factor(pos), value, color = PD)) +
      geom_boxplot(position = position_dodge(0.75)) +
      scale_color_manual(values = case_control) +
      theme_minimal() +
      stat_summary(aes(group = PD), fun = mean, geom = "line") +
      labs(title = unique(cur_plot$DMR),x="POS",y=bquote("Methylation"~beta),color = "PD status") +
      theme(axis.text.x = element_text(angle = 90))
    print(p1)
    print(p2)
  }
}

plot_dmrs(dmr_cross, prs_annot, prs_mat,mvalues, rev(c("gray80", "gray40")))
plot_dmrs(dmr_males,prs_annot_male,prs_mat_male, mvalues_male,rev(c("light blue", "lightblue4")))
plot_dmrs(dmr_female, prs_annot_female, prs_mat_female, mvalues_female,rev(c("lightpink", "lightpink4")))
```


```{r}
dmr_cross
dmr_males
dmr_female
save(list=c("dmr_cross","dmr_males","dmr_female"),file="prs_dmr_nalls.RData")
```
